# -*- coding: utf-8 -*-
'''
Created on Aug 12, 2012

@author: LONG HOANG GIANG
'''
import sys, os
import mechanize
import traceback
from urlparse import urljoin
sys.path.append(os.path.expanduser('/home5/vietcntt/longhoanggiang/python'))
sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
from CrawlerLib import Http, commonlib, html2text
import datetime
import re
import simplejson as json
import sqlite3 as db
import gzip
import cStringIO as StringIO


def getListStory():
    
    url = 'http://vanhoc.xitrum.net/truyencotich/vietnam/trang/1.html'
    while True:
        tree = Http.getXMLTree(url)
        for item in tree.xpath("//img[@src='/images/dot.gif']/../following-sibling::*[1]//a[@class='subleft']"):
            print commonlib.stringify(item)
        pageNode = tree.xpath("//img[contains(@src, 'next.gif')]")
        print len(pageNode)
        if len(pageNode) < 1: break
        pageNode = pageNode[0]
        link = commonlib.normalize_str(pageNode.get('href'))
        if link == '': break
        url = urljoin(url, link)
        

getListStory()